package com.anmai.crawler1.utils;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;

/**
 * Created by CJ on 2018-03-29.
 */
public class JsoupUtils {

    private final static Logger logger = LoggerFactory.getLogger(JsoupUtils.class);

    public static Connection conn;

    //获取源码
    public static String getHtml(String url)  throws  IOException{
        try {
            return Jsoup.connect(url)
                    .timeout(20000)
                    .ignoreContentType(true)
                    .execute()
                    .body();
        } catch (IOException e) {
            try {
                Thread.sleep(500);
            } catch (InterruptedException e1) {
                e1.printStackTrace();
            }
            return Jsoup.connect(url)
                    .timeout(8000)
                    .ignoreContentType(true)
                    .execute()
                    .body();
        }
    };

    //获取document对象
    public static Document getDocument(String url) throws IOException {
        try {
            return Jsoup.connect(url)
                    .ignoreContentType(true)
                    .timeout(8000)
                    .get();
        } catch (IOException e) {
            try {
                Thread.sleep(500);
            } catch (InterruptedException e1) {
                e1.printStackTrace();
            }
            return Jsoup.connect(url)
                    .ignoreContentType(true)
                    .timeout(8000)
                    .get();
        }
    }

    //设置请求头信息
    //获取源码
    public static Connection getConnection(String url) throws IOException {
        try {
            Thread.sleep(500);
        } catch (InterruptedException e1) {
            e1.printStackTrace();
        }
        return Jsoup.connect(url)
                .timeout(200000)
                .ignoreContentType(true);
    }



}
